##################################################################
##################################################################
## Replication Material
## Tobias Widmann:  Do Politicians Appeal to Discrete Emotions? The Effect of Wind Turbine Construction on Elite Discourse 
## Journal of Politics
## widmann@ps.au.dk
##
## Script 002: Replication of the Main Analysis
##################################################################
##################################################################

# Note: The file 000_README.pdf describes all scripts and datasets required to replicate the analysis

# This script was run on the following R version, platform and OS:
# R version 4.0.5 (2021-03-31)
# Platform: x86_64-apple-darwin17.0 (64-bit)
# Running under: macOS 13.5.2


#### Set Working Directory to the Replication Folder# ###########################

# Set the directory to the replication folder
setwd("./files")

#### Install & Load Packages ##############################################################

install.packages("devtools", version = "2.4.0")
install.packages("groundhog", version = "3.1.1")

library("groundhog")
pckgs <- c("stm", "rgdal", "mapproj")
groundhog.library(pckgs, "2021-05-01")

devtools::install_version("tidyverse", version = "1.3.1")
devtools::install_version("tm", version = "0.7-8")
devtools::install_version("Rtsne", version = "0.15")
devtools::install_version("geometry", version = "0.4.5")
devtools::install_version("rsvd", version = "1.0.5")
devtools::install_version("dplyr", version = "1.0.5")
devtools::install_version("openxlsx", version = "4.2.3")
devtools::install_version("tidytext", version = "0.3.1")
devtools::install_version("quanteda", version = "3.0.0")
devtools::install_version("plm", version = "2.4-1")
devtools::install_version("lmtest", version = "0.9-40")
devtools::install_version("dotwhisker", version = "0.7.4")
devtools::install_version("pbkrtest", version = "0.5.1")
devtools::install_version("car", version = "3.1-2")
devtools::install_version("rstatix", version = "0.7.2")
devtools::install_version("ggpubr", version = "0.4.0")

library(tidyverse)
library(tm)
library(Rtsne)
library(geometry)
library(rsvd)
library(dplyr)
library(openxlsx)
library(tidytext)
library(quanteda)
library(plm)
library(lmtest)
library(dotwhisker)
library(pbkrtest)
library(car)
library(rstatix)
library(ggpubr)

#### Load Data ########################################################################
load("./parl_speech.Rdata")
load("./wind_data.Rdata")
load("./btw_data.Rdata")
load("./wind_data_unique.Rdata")
btw_raw <- readOGR("./Geometrie_Wahlkreise_19DBT_geo.shp", verbose = FALSE) 

#### Figure 1 #########################################################################

# Convert the data stored in the 'btw_raw' spatial object into a data frame
# and add row names as a new column named 'id'.
btw_map_dat <- btw_raw@data %>% 
  as.data.frame() %>% 
  rownames_to_column("id")

# Transform the coordinate reference system of 'btw_raw' to WGS84 (commonly used for GPS).
btw_map_wt <- spTransform(btw_raw, CRS("+proj=longlat +datum=WGS84"))

# Convert the spatial object to a format suitable for ggplot and merge it with 'btw_map_dat' using 'id' as the key.
btw_map <- fortify(btw_map_wt) %>% 
  left_join(btw_map_dat, by = "id")

# Filter the data for the year "BTW 2017", join it with 'btw_map' after renaming and converting the 'WKR_NR' column, and store the result in 'btw_map17'.
btw_map17 <- btw_data %>% 
  filter(year == "BTW 2017") %>% 
  inner_join(btw_map %>% 
               rename(wahlkreis_nr = WKR_NR) %>% 
               mutate(wahlkreis_nr = as.character(wahlkreis_nr) %>% as.integer))

# Add a new column 'year' to 'btw_map' with a constant value of "BTW 2017".
btw_map <- btw_map17 %>% 
  mutate(year = "BTW 2017")

# Convert the 'electoral district' column (wahlkreis) of 'wind_data_unique' into a list and store it in 'wks_list'.
wks_list <- as.list(wind_data_unique$wahlkreis)

# Create a new column 'treat' in 'btw_map' and initialize it with zeros.
btw_map$treat <- 0
# Update the 'treat' column to 1 for rows where 'wahlkreis_nr' is in 'wks_list'.
btw_map$treat[btw_map$wahlkreis_nr %in% wks_list] <- 1

# Initialize a new column 'count' in 'btw_map' with zeros.
btw_map$count <- 0
# Extract the 1st and 10th columns of 'parl_speeches' into 'count_data'.
count_data <- ws_parl[,c(1,10)]
# Sort 'count_data' by 'wkindirekt' and descending order of the absolute value of 'count'.
count_data <- count_data[order(count_data$wkindirekt, -abs(count_data$count)), ] 
# Keep only the distinct rows of 'count_data' based on 'wkindirekt' and retain all columns.
count_data <- count_data %>% distinct(wkindirekt, .keep_all = T)
# Filter out rows from 'count_data' where 'wkindirekt' is NA.
count_data <- count_data[!is.na(count_data$wkindirekt),]

# For each 'wkindirekt' in 'count_data', update the corresponding 'count' value in 'btw_map'.
for (i in count_data$wkindirekt){
  btw_map$count[btw_map$wahlkreis_nr==i] <- count_data$count[count_data$wkindirekt==i]
}

# Create a binary map visualization based on the 'treat' column of 'btw_map'.
map_binary <- btw_map %>% 
  ggplot(aes(fill = as.factor(treat))) +
  geom_map(map = btw_map17,
           aes(x = long, y = lat, group = group, map_id = id),
           color = "black", size = 0.4)  + 
  theme_void() +
  coord_map() +
  scale_fill_manual(name = "Binary", values = c("blue", "coral"))

# Create a map visualization based on the 'count' column of 'btw_map'.
map_count <- btw_map %>% 
  ggplot(aes(fill = count)) +
  geom_map(map = btw_map17,
           aes(x = long, y = lat, group = group, map_id = id),
           color = "black", size = 0.4)  + 
  theme_void() +
  coord_map() +
  scale_fill_gradient("Count", low = "blue", high = "coral")

# Arrange the two maps side by side in a single plot.
ggarrange(map_binary, map_count,
          ncol = 2, nrow = 1)

# Exporting figure to .pdf
ggsave("./figures/figure1.pdf")

#### Figure 2 ##########################################################################

df56_party <- ws_parl %>%
  group_by(party) %>%
  dplyr::summarize(t_prop = mean(topic56, na.rm = T), # mean topic proportion
                   sd_tp = sd(topic56, na.rm = TRUE), # standard deviation
                   n_tp = n()) %>% # count
  mutate(se_tp = sd_tp / sqrt(n_tp), # standard error
         lower.ci = t_prop - qt(1 - (0.05 / 2), n_tp - 1) * se_tp, # lower confidence interval
         upper.ci = t_prop + qt(1 - (0.05 / 2), n_tp - 1) * se_tp) # upper confidence interval

# Create a horizontal plot of the topic proportion for each party, with error bars representing the confidence intervals.
plot56_party <- df56_party %>%
  ggplot(aes(t_prop, as.factor(party))) +
  geom_point(color = "#CC6666") + # points for topic proportion
  geom_errorbarh(aes(xmin = lower.ci, xmax = upper.ci), height = .1, color = "#CC6666") + # horizontal error bars
  theme_bw() + # black and white theme
  labs(x = "Topic Proportion (Topic 56)", y = "Party")

# Group the 'parl_speeches' data by 'treat_indirect' and calculate the same statistics as above.
df56 <- ws_parl %>%
  group_by(treat_indirect) %>%
  dplyr::summarize(t_prop = mean(topic56, na.rm = T),
                   sd_tp = sd(topic56, na.rm = TRUE),
                   n_tp = n()) %>%
  mutate(se_tp = sd_tp / sqrt(n_tp),
         lower.ci = t_prop - qt(1 - (0.05 / 2), n_tp - 1) * se_tp,
         upper.ci = t_prop + qt(1 - (0.05 / 2), n_tp - 1) * se_tp)

# Create a horizontal plot of the topic proportion for each treatment group, with error bars.
plot56 <- df56 %>%
  ggplot(aes(t_prop, as.factor(treat_indirect))) +
  geom_point(color = "#CC6666") +
  geom_errorbarh(aes(xmin = lower.ci, xmax = upper.ci), height = .1, color = "#CC6666") +
  theme_bw() +
  labs(x = "Topic Proportion (Topic 56)", y = "Treatment")

# Arrange the two plots side by side in a single display.
ggarrange(plot56_party, plot56,
          ncol = 2, nrow = 1)

# Exporting figure to .pdf
ggsave("./figures/figure2.pdf")
#### Figure 3 #####################################################################


# Start by assigning parl_speeches data to a new dataframe for analysis.
df_analysis <- ws_parl

# Convert the 'date2' column to numeric and subtract 17434.
df_analysis$date3 <- as.numeric(df_analysis$date2) - 17434

# Initialize counters k and j.
k <- 1
j <- 0

# Create a new 'months' column in df_analysis, setting it to NULL initially.
df_analysis$months <- NULL

# Loop through a sequence of numbers to assign a month value based on the 'date3' column.
for (i in seq(from=30, to=1200, by=30)){
  df_analysis$months[df_analysis$date3 < i & df_analysis$date3 > j] <- k
  j <- i
  k <- k + 1
}

# Filter df_analysis to remove rows where 'wkindirekt' is NA.
df_analysis2 <- df_analysis[!is.na(df_analysis$wkindirekt),]

# Filter df_analysis2 to keep rows where 'topic' is either 56 or 16.
df3 <- df_analysis2[df_analysis2$topic == 56 | df_analysis2$topic == 16,]

# Remove spaces from the 'party' column and also remove any trailing whitespace.
df_analysis2$party <- gsub(" ", "", df_analysis2$party)
df_analysis2$party <- gsub("(*UCP)\\s*", "", df_analysis2$party, perl = TRUE)

# Create subsets of df3 for each party, filtering based on the 'treat_indirect' column.
afd <- df3[!(df3$party != "AfD" & df3$treat_indirect == 1),]
green <- df3[!(df3$party != "Greens" & df3$treat_indirect == 1),]
spd <- df3[!(df3$party != "SPD" & df3$treat_indirect == 1),]
linke <- df3[!(df3$party != "The Left" & df3$treat_indirect == 1),]
cdu <- df3[!(df3$party != "CDU/CSU" & df3$treat_indirect == 1),]
fdp <- df3[!(df3$party != "FDP" & df3$treat_indirect == 1),]

# The following blocks of code aggregate the data for each party and a specific emotion ('el.anger' or 'el.disgust'), 
# A two-way fixed effects model is then estimated using the 'plm' package. 
# The results of the model are extracted and stored in a dataframe.

# Analysis for the Greens party and 'el.anger'.
df_agg <- aggregate(green$el.anger, list(green$name, green$months, green$count, green$party), mean)
# Rename columns for clarity.
colnames(df_agg) <- c("name", "date", "treat_indirect", "party", "anger")

# Sort data and keep distinct rows.
df_agg <- df_agg[order(df_agg$name, -abs(df_agg$treat_indirect)), ]
df_agg <- df_agg %>% distinct(name, date, .keep_all = T)

# Estimate the fixed effects model.
fixed1 <- plm(anger ~ treat_indirect, model="within", index = c("name", "date"), 
              effect = "twoways", data = df_agg)

# Robust standard errors are calculated using heteroskedasticity-consistent (HC0) covariance matrix estimation. Clustering is applied on the 'group' variable to account for potential correlations within groups.
model1 <- coeftest(fixed1, vcov=vcovHC(fixed1, type="HC0", cluster="group"))

# Extract model results.
dfdf1 <- broom::tidy(model1)
dfdf1$model <- "Anger"
dfdf1$term <- "Greens"

#Repeat for Disgust and other parties
df_agg <- aggregate(green$el.disgust, list(green$name, green$months, green$count, green$party), mean)
colnames(df_agg)[1] <- "name"
colnames(df_agg)[2] <- "date"
colnames(df_agg)[3] <- "treat_indirect"
colnames(df_agg)[4] <- "party"
colnames(df_agg)[5] <- "disgust"

df_agg <- df_agg[order(df_agg$name, -abs(df_agg$treat_indirect) ), ] ### sort first
df_agg <- df_agg %>% distinct(name, date, .keep_all = T)

df_agg$disgust2 <- scale(df_agg$disgust)

fixed2 <- plm(disgust ~ treat_indirect, model="within", index = c("name", "date"), 
              effect = "twoways", data = df_agg)
model2 <- coeftest(fixed2, vcov=vcovHC(fixed2,type="HC0",cluster="group"))

dfdf2 <- broom::tidy(model2)
dfdf2$model <- "Disgust"
dfdf2$term <- "Greens"


df_agg <- aggregate(afd$el.anger, list(afd$name, afd$months, afd$count, afd$party), mean)
colnames(df_agg)[1] <- "name"
colnames(df_agg)[2] <- "date"
colnames(df_agg)[3] <- "treat_indirect"
colnames(df_agg)[4] <- "party"
colnames(df_agg)[5] <- "anger"

df_agg <- df_agg[order(df_agg$name, -abs(df_agg$treat_indirect) ), ] ### sort first
df_agg <- df_agg %>% distinct(name, date, .keep_all = T)



fixed3 <- plm(anger ~ treat_indirect, model="within", index = c("name", "date"), 
              effect = "twoways", data = df_agg)
model3 <- coeftest(fixed3, vcov=vcovHC(fixed3,type="HC0",cluster="group"))

dfdf3 <- broom::tidy(model3)
dfdf3$model <- "Anger"
dfdf3$term <- "AfD"

df_agg <- aggregate(afd$el.disgust, list(afd$name, afd$months, afd$count, afd$party), mean)
colnames(df_agg)[1] <- "name"
colnames(df_agg)[2] <- "date"
colnames(df_agg)[3] <- "treat_indirect"
colnames(df_agg)[4] <- "party"
colnames(df_agg)[5] <- "disgust"

df_agg <- df_agg[order(df_agg$name, -abs(df_agg$treat_indirect) ), ] ### sort first
df_agg <- df_agg %>% distinct(name, date, .keep_all = T)

df_agg$disgust2 <- scale(df_agg$disgust)

fixed4 <- plm(disgust ~ treat_indirect, model="within", index = c("name", "date"), 
              effect = "twoways", data = df_agg)
model4 <- coeftest(fixed4, vcov=vcovHC(fixed4,type="HC0",cluster="group"))

dfdf4 <- broom::tidy(model4)
dfdf4$model <- "Disgust"
dfdf4$term <- "AfD"

df_agg <- aggregate(cdu$el.anger, list(cdu$name, cdu$months, cdu$count, cdu$party), mean)
colnames(df_agg)[1] <- "name"
colnames(df_agg)[2] <- "date"
colnames(df_agg)[3] <- "treat_indirect"
colnames(df_agg)[4] <- "party"
colnames(df_agg)[5] <- "anger"

df_agg <- df_agg[order(df_agg$name, -abs(df_agg$treat_indirect) ), ] ### sort first
df_agg <- df_agg %>% distinct(name, date, .keep_all = T)



fixed5 <- plm(anger ~ treat_indirect, model="within", index = c("name", "date"), 
              effect = "twoways", data = df_agg)
model5 <- coeftest(fixed5, vcov=vcovHC(fixed5,type="HC0",cluster="group"))

dfdf5 <- broom::tidy(model5)
dfdf5$model <- "Anger"
dfdf5$term <- "CDU/CSU"

df_agg <- aggregate(cdu$el.disgust, list(cdu$name, cdu$months, cdu$count, cdu$party), mean)
colnames(df_agg)[1] <- "name"
colnames(df_agg)[2] <- "date"
colnames(df_agg)[3] <- "treat_indirect"
colnames(df_agg)[4] <- "party"
colnames(df_agg)[5] <- "disgust"

df_agg <- df_agg[order(df_agg$name, -abs(df_agg$treat_indirect) ), ] ### sort first
df_agg <- df_agg %>% distinct(name, date, .keep_all = T)

df_agg$disgust2 <- scale(df_agg$disgust)

fixed6 <- plm(disgust ~ treat_indirect, model="within", index = c("name", "date"), 
              effect = "twoways", data = df_agg)
model6 <- coeftest(fixed6, vcov=vcovHC(fixed6,type="HC0",cluster="group"))

dfdf6 <- broom::tidy(model6)
dfdf6$model <- "Disgust"
dfdf6$term <- "CDU/CSU"



df_agg <- aggregate(fdp$el.anger, list(fdp$name, fdp$months, fdp$count, fdp$party), mean)
colnames(df_agg)[1] <- "name"
colnames(df_agg)[2] <- "date"
colnames(df_agg)[3] <- "treat_indirect"
colnames(df_agg)[4] <- "party"
colnames(df_agg)[5] <- "anger"

df_agg <- df_agg[order(df_agg$name, -abs(df_agg$treat_indirect) ), ] ### sort first
df_agg <- df_agg %>% distinct(name, date, .keep_all = T)



fixed7 <- plm(anger ~ treat_indirect, model="within", index = c("name", "date"), 
              effect = "twoways", data = df_agg)
model7 <- coeftest(fixed7, vcov=vcovHC(fixed7,type="HC0",cluster="group"))

dfdf7 <- broom::tidy(model7)
dfdf7$model <- "Anger"
dfdf7$term <- "FDP"

df_agg <- aggregate(fdp$el.disgust, list(fdp$name, fdp$months, fdp$count, fdp$party), mean)
colnames(df_agg)[1] <- "name"
colnames(df_agg)[2] <- "date"
colnames(df_agg)[3] <- "treat_indirect"
colnames(df_agg)[4] <- "party"
colnames(df_agg)[5] <- "disgust"

df_agg <- df_agg[order(df_agg$name, -abs(df_agg$treat_indirect) ), ] ### sort first
df_agg <- df_agg %>% distinct(name, date, .keep_all = T)

df_agg$disgust2 <- scale(df_agg$disgust)

fixed8 <- plm(disgust ~ treat_indirect, model="within", index = c("name", "date"), 
              effect = "twoways", data = df_agg)
model8 <- coeftest(fixed8, vcov=vcovHC(fixed8,type="HC0",cluster="group"))

dfdf8 <- broom::tidy(model8)
dfdf8$model <- "Disgust"
dfdf8$term <- "FDP"

df_agg <- aggregate(spd$el.anger, list(spd$name, spd$months, spd$count, spd$party), mean)
colnames(df_agg)[1] <- "name"
colnames(df_agg)[2] <- "date"
colnames(df_agg)[3] <- "treat_indirect"
colnames(df_agg)[4] <- "party"
colnames(df_agg)[5] <- "anger"

df_agg <- df_agg[order(df_agg$name, -abs(df_agg$treat_indirect) ), ] ### sort first
df_agg <- df_agg %>% distinct(name, date, .keep_all = T)



fixed9 <- plm(anger ~ treat_indirect, model="within", index = c("name", "date"), 
              effect = "twoways", data = df_agg)
model9 <- coeftest(fixed9, vcov=vcovHC(fixed9,type="HC0",cluster="group"))

dfdf9 <- broom::tidy(model9)
dfdf9$model <- "Anger"
dfdf9$term <- "SPD"

df_agg <- aggregate(spd$el.disgust, list(spd$name, spd$months, spd$count, spd$party), mean)
colnames(df_agg)[1] <- "name"
colnames(df_agg)[2] <- "date"
colnames(df_agg)[3] <- "treat_indirect"
colnames(df_agg)[4] <- "party"
colnames(df_agg)[5] <- "disgust"

df_agg <- df_agg[order(df_agg$name, -abs(df_agg$treat_indirect) ), ] ### sort first
df_agg <- df_agg %>% distinct(name, date, .keep_all = T)

df_agg$disgust2 <- scale(df_agg$disgust)

fixed10 <- plm(disgust ~ treat_indirect, model="within", index = c("name", "date"), 
               effect = "twoways", data = df_agg)
model10 <- coeftest(fixed10, vcov=vcovHC(fixed10,type="HC0",cluster="group"))

dfdf10 <- broom::tidy(model10)
dfdf10$model <- "Disgust"
dfdf10$term <- "SPD"


df_agg <- aggregate(linke$el.anger, list(linke$name, linke$months, linke$count, linke$party), mean)
colnames(df_agg)[1] <- "name"
colnames(df_agg)[2] <- "date"
colnames(df_agg)[3] <- "treat_indirect"
colnames(df_agg)[4] <- "party"
colnames(df_agg)[5] <- "anger"

df_agg <- df_agg[order(df_agg$name, -abs(df_agg$treat_indirect) ), ] ### sort first
df_agg <- df_agg %>% distinct(name, date, .keep_all = T)



fixed11 <- plm(anger ~ treat_indirect, model="within", index = c("name", "date"), 
               effect = "twoways", data = df_agg)
model11 <- coeftest(fixed11, vcov=vcovHC(fixed11,type="HC0",cluster="group"))

dfdf11 <- broom::tidy(model11)
dfdf11$model <- "Anger"
dfdf11$term <- "The Left"

df_agg <- aggregate(linke$el.disgust, list(linke$name, linke$months, linke$count, linke$party), mean)
colnames(df_agg)[1] <- "name"
colnames(df_agg)[2] <- "date"
colnames(df_agg)[3] <- "treat_indirect"
colnames(df_agg)[4] <- "party"
colnames(df_agg)[5] <- "disgust"

df_agg <- df_agg[order(df_agg$name, -abs(df_agg$treat_indirect) ), ] ### sort first
df_agg <- df_agg %>% distinct(name, date, .keep_all = T)

df_agg$disgust2 <- scale(df_agg$disgust)

fixed12 <- plm(disgust ~ treat_indirect, model="within", index = c("name", "date"), 
               effect = "twoways", data = df_agg)
model12 <- coeftest(fixed12, vcov=vcovHC(fixed12,type="HC0",cluster="group"))

dfdf12 <- broom::tidy(model12)
dfdf12$model <- "Disgust"
dfdf12$term <- "The Left"


# Combine results for 'el.anger' from different parties.
df_anger <- rbind(dfdf1, dfdf3, dfdf5, dfdf7, dfdf9, dfdf11)

# Combine results for 'el.disgust' from different parties.
df_disgust <- rbind(dfdf2, dfdf4, dfdf6, dfdf8, dfdf10, dfdf12)

# Order the dataframes by party.
df_anger$term <- factor(df_anger$term, levels=c("The Left", "Greens", "SPD", "CDU/CSU", "FDP", "AfD"))
df_anger <- df_anger[order(df_anger$term), ]
df_disgust$term <- factor(df_disgust$term, levels=c("The Left", "Greens", "SPD", "CDU/CSU", "FDP", "AfD"))
df_disgust <- df_disgust[order(df_disgust$term), ]

# Create dot-whisker plots for the coefficients of 'el.anger' and 'el.disgust' using the 'jtools' package.
anger_coef <- dwplot(df_anger, show_intercept = TRUE, ci = .95,
                     vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
  relabel_predictors(c(afd = "AfD", green = "The Greens", spd = "SPD", cdu = "CDU/CSU", fdp = "FDP", linke = "The Left")) +
  xlab("Coefficient Estimate (Anger)") + ylab("") +
  scale_colour_manual(values = c("#6699cc")) +
  guides(colour = "none") +
  theme_bw()

disgust_coef <- dwplot(df_disgust, show_intercept = TRUE, ci = .95,
                       vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) %>%
  relabel_predictors(c(afd = "AfD", green = "The Greens", spd = "SPD", cdu = "CDU/CSU", fdp = "FDP", linke = "The Left")) +
  xlab("Coefficient Estimate (Disgust)") + ylab("") +
  scale_colour_manual(values = c("#CC6666")) +
  guides(colour = "none") +
  theme_bw()

# Combine the two plots side by side.
ggpubr::ggarrange(anger_coef, disgust_coef)

# Exporting figure to .pdf
ggsave("./figures/figure3.pdf")

